***************************************************************************************************
***************************************************************************************************
***************************************************************************************************


/**************************************************************************************************
Datacleaning for Martini and Uruena (2024) Can role models influence female's decision to participate 
in the labor market? Evidence from a field experiment
**************************************************************************************************/



clear all
set more off

global Directory "/Users/.../Replication/"



/**** Cleaning and Transforming Data from the Experiment ********/




*EXPERIMENT:




cd "${Directory}Data/raw"

use "schoolexp.dta", clear
set more off, perm




*disctrict variable 
encode district, generate(city)


label variable city "Location of School"
order city, after(district)

recode city (1=1) (2=3) (3=2) (4=4)


**Creating an unique id

rename treat_video treatment
label variable treatment "Treatment"
recode treatment (3=0)

rename treat_job subtreatment
label variable subtreatment "Subtreatment"

gen id= string(player) + string(city) + string(treatment) + string(subtreatment) 

gen id_1 = string(real(id),"%05.0f")

drop id

rename id_1 id

order id, after(end)


**Renaming Variables

rename social_decision take_advantage

rename motivated_character motivated_video

rename gender female


rename av_grade grade_app

encode luck_result, gen (luck)
order luck, after(luck_result)
drop luck_result


**Labeling all variables

label variable group "Group number"

label variable session "Session number"

label variable take_advantage "Take advantage"

label variable luck "Luck in Game"

label variable know_character "Knows character"

label variable relation_character "Participants's relation to character"

label variable motivated_video "Participant's motivation after video"

label variable like_character "Participant likes character"

label variable identify_character "Identifies with character"

label variable success_character "Character is successful"

label variable risk_character "Character is not afraid of risks"

label variable future_character "Can be as successful as character"

label variable education_max "Maximum education in years in city or village"

label variable education_min "Minimum education in years in city or village"

label variable education_achieved "Years of education present"

label variable education_goal "Years of education desired"

label variable education_exp "Years of education expected"

label variable income_max "Maximum Income per month in city or village(Ariary)"

label variable income_min "Minimum Income per month in city or village (Ariary)"

label variable income_achieved "Income per month present(Ariary)"

label variable income_goal "Income per month desired(Ariary)"

label variable status_max "Maximum social status in city or village"

label variable status_min "Minimum social status in city or village"

label variable status_achieved "Social status at present"

label variable status_goal "Social status desired"

label variable status_exp "Social status expected"

label variable asset_max "Maximum level of assets in city or village (Ariary)"

label variable asset_min "Minimum level of assets in city or village (Ariary)"

label variable asset_achieved "Present level of assets"

label variable asset_goal "Level of assets desired"

label variable asset_exp "Level of assets expected"

label variable weight_edu "Weight education"

label variable weight_inc "Weight income"

label variable weight_social "Weight social"

label variable weight_assets "Weight assets"

label variable total_weight "Total weight"

label variable ptask_1 "Perf. 1"

label variable answered_1 "Number of answers task 1"

label variable belief_1 "Belief performance task 1"

label variable ptask_2 "Perf. 2"

label variable answered_2 "Number of answers task 2"

label variable belief_2 "Belief performance task 2"

label variable winner_2 "Winner task 2"

label variable ptask_3 "Performance task 3"

label variable decision_3 "Compete 3" 

label variable answered_3 "Number of answers task 3"

label variable belief_3 "Belief performance task 3"

label variable winner_3 "Winner task 3"

label variable decision_4 "Decision to compete task 4"

label variable winner_4 "Winner Task 4"

label variable ptask_5 "Performance task 5"

label variable decision_5 "Compete 5"

label variable answered_5 "Number of answers task 5"

label variable belief_5 "Belief performance task 5"

label variable winner_5 "Winner task 5"

label variable belief_rank_1 "Belief rank task 1"

label variable rank_1 "Rank task 1"

label variable belief_rank_2 "Belief rank task 2"

label variable rank_2 "Rank task 2"

label variable belief_pother "Belief average performance others"

label variable pothers "Average performance"

label variable belief_compete "Belief decision to compete others task 3"

label variable number_compete "Decision to compete others task 3"

label variable earnings_activity "Payment recieved"

label variable interest_job "Job interest"

label variable position_job "Position of interest"

label variable female "Female" 

label variable complete_app "Complete application"

label variable grade_app "Average grade application"

label variable level "Level of studies"

label variable english_level "English level"

label variable french_level "French level"

label variable knowledge_project "Knowledge project"

label variable expect_salary "Salary expectation"

label variable work_experience "Work experience"


***Label define


label define mvideo 1 "Woman video" 2 "Man video" 0 "Control", replace
label values treatment mvideo

label define mjob 1 "Public" 2 "Private"
label val subtreatment mjob

label define msession 1 "morning session" 2 "afternoon session"
label val session msession

replace take_advantage=0 if take_advantage==1
replace take_advantage=1 if take_advantage==9

label define mtake 0 "does not take advantage" 1 "takes advantage"
label val take_advantage mtake


replace luck=0 if luck==1
replace luck=1 if luck==2

label define mluck 0 "bad luck" 1 "good luck"
label val luck mluck


label define mrelation 1 "not close at all" 2 "not close" 3 "neutral" 4 "close" 5 "very close"
label val relation_character mrelation

replace relation_character = . if relation_character == 0

*define a label for the categorical answers that can be used for all these types of questions:
label define magree 1 "strongly disagree" 2 "disagree" 3 "neither agree nor disagree" ///
4 "agree" 5 "strongly agree" 

label val motivated_video magree

label val like_character magree
replace like_character = . if like_character == 0

label val identify_character magree
replace identify_character = . if identify_character == 0 

label val success_character magree

label val risk_character magree

label val future_character magree
replace future_character = . if future_character == 0 

label define myes 0 "No" 1 "Yes"
label val know_character myes

label define mwinner 0 "loser" 1 "winner"
label val winner_2 mwinner


*cleaning beliefs 
tab belief_1, m
replace belief_1 = 10 if belief_1 == 100 //typo

tab belief_2, m
tab belief_3, m
tab belief_5, m

tab belief_rank_1

*rank

tab belief_rank_1, m
tab belief_rank_2, m
replace belief_rank_2 = 4 if belief_rank_2 == 6 //lowest rank
*coding mistake: 
replace belief_rank_2 = 3 if id == "02401"

tab rank_1, m
tab rank_2, m


*performance (should not be above 10)

tab ptask_1, m 
tab ptask_2, m 
tab ptask_3, m 
tab ptask_5, m 


**recoding of decision_3

encode decision_3, gen(decision_31)
recode decision_31 (2=0)
label define mdecision 0 "no compete" 1 "compete"
order decision_31, after(decision_3)
drop decision_3
rename decision_31 decision_3
label val decision_3 mdecision

label val winner_3 mwinner

**recoding decision_4 

encode decision_4, gen(decision_41)
recode decision_41 (2=0)
order decision_41, after(decision_4)
drop decision_4
rename decision_41 decision_4
label val decision_4 mdecision

label val winner_4 mwinner

***recoding decision_5

encode decision_5, gen(decision_51)
recode decision_51 (2=0)
order decision_51, after(decision_5)
drop decision_5
rename decision_51 decision_5
label val decision_5 mdecision

label val winner_5 mwinner

**recode interest_job

encode interest_job, gen(interest_job1)
recode interest_job1 (1=0) (2=1)
order interest_job1, after(interest_job)
drop interest_job
rename interest_job1 interest_job
label val interest_job myes

**recode position_job

label define mposition 0 "Assistant" 1 "Coordinator"
encode position_job, gen(position_job1)
recode position_job1 (1=.) (2=0) (3=1)
order position_job1, after(position_job)
drop position_job
rename position_job1 position_job
label val position_job mposition

*recode female

label define mfemale 0 "Male" 1 "Female"
encode female, gen(female1)
recode female1 (2=0)
order female1, after(female)
drop female
rename female1 female
label val female mfemale

**recode complete_app  -- This variable only includes those that were interested and applied 

codebook complete_app
label val complete_app myes
replace complete_app = 0 if complete_app ==. //missing values=those who did not apply

***recoding of level and generating the science and language focus

label define mlevel 1 "A1" 2 "A2" 3 "C" 4 "D"
encode level, gen(level1)
order level1, after(level)
drop level
rename level1 level
label val level mlevel

gen science_focus_app=.
order science_focus_app, after(level)
replace science_focus_app=1 if level==3 | level==4
replace science_focus_app=0 if level==1| level==2
label variable science_focus_app "Science focus Application Form"

gen language_focus_app=.
order language_focus_app, after(level)
replace language_focus_app=1 if level==1| level==2
replace language_focus_app=0 if level==3| level==4
label variable language_focus_app "Language focus Application Form"

gen science_focus_high_app =. 
replace science_focus_high_app=1 if level==3 
replace science_focus_high_app=0 if level==1| level==2| level==4
label variable science_focus_high_app "Science focus high Application Form"

gen science_focus_low_app=. 
replace science_focus_low_app=1 if level==4 
replace science_focus_low_app=0 if level==1| level==2| level==3
label variable science_focus_low_app "Science focus low Application Form" 

gen language_focus_high_app=. 
replace language_focus_high_app=1 if level==1 
replace language_focus_high_app=0 if level==2| level==3| level==4
label variable language_focus_high_app "Language focus high Application Form"

gen language_focus_low_app =. 
replace language_focus_low_app=1 if level==2
replace language_focus_low_app=0 if level==1| level==3| level==4
label variable language_focus_low_app "Language focus low Application Form"

label define mrating 1 "not good" 2 "so so" 3 "good" 4 "very good"
label val english_level mrating

label val french_level mrating

label val knowledge_project myes

label val work_experience myes



***ASPIRATION QUESTIONS


*education_max

encode education_max, gen(education_max1)
recode education_max1 (2=12.5) (3=14) (4=16) (5=17) (6=9.5) (7=16) (8=5.5) (9=22) /// 
(10=16) (11=16) (12=18) (13=18) (14=16) (15=5) (16=7) (17=9) (18=12) (19=12)


label drop education_max1
order education_max1, after(education_max)
drop education_max
rename education_max1 education_max
label var education_max "Education max"

*education_min

encode education_min, gen(education_min1)

recode education_min1 (0=0) (1=.) (2=0) (3=12.5) (4=9.5) (5=5.5) (6=5.5) (7=18) (8=1) ///
(9=11) (10=2) (11=3) (12=4) (13=5) (14=6) (15=7) (16=8) (17=9) (18=12) (19=12)

label drop education_min1
order education_min1, after(education_min)
drop education_min
rename education_min1 education_min
label var education_min "Education min"


*education_achieved

encode education_achieved, gen(education_achieved1)

recode education_achieved1 (1=15) (2=12) (3=12) (4=12)


label drop education_achieved1
order education_achieved1, after(education_achieved)
drop education_achieved
rename education_achieved1 education_achieved
label var education_achieved "Current education level"



*education_goal

encode education_goal, gen(education_goal1)

recode education_goal1 (1=.) (2=12.5) (3=13) (4=22) (5=24) (6=14) (7=15) (8=16) ///
(9=17) (10=18) (11=19) (12=20) (13=21) (14=22) (15=22) (16=16) (17=16) (18=18) (19=18) ///
(20=18) (21=18) (22=16) (23=22) (24=12) (25=22)
label drop education_goal1
order education_goal1, after(education_goal)
drop education_goal
rename education_goal1 education_goal
label var education_goal "Education aspiration"




*education_exp

encode education_exp, gen(education_exp1)

recode education_exp1 (1=.) (2=6) (3=8) (4=12.5) (5=13) (6=22) (7=24) (8=14) (9=15) ///
(10=16) (11=17) (12=18) (13=19) (14=20) (15=22) (16=22) (17=22) (18=16) (19=16) (20=18) ///
(21=18) (22=18) (23=18) (24=18) (25=22) (26=22) (27=12)

label drop education_exp1
order education_exp1, after(education_exp)
drop education_exp
rename education_exp1 education_exp
label var education_exp "Education expectation"


*asset_min
tab asset_min

*id 16322 reported FMG instead of Ariary:
replace asset_min = asset_min/5 if id=="16322"
*id 39121 reported FMG instead of Ariary:
replace asset_min = asset_min/5 if id=="39121"
*id 01421 one 0 was coded too much:
replace asset_min = 15000000 if id=="01421"

label var asset_min "Asset min (Ariary)"

generate asset_min_1000= asset_min/1000
label var asset_min_1000 "Asset min (1000 Ariary)"

gen asset_min_ln = ln(asset_min)
label var asset_min_ln "Assets min (ln)"


format asset_min asset_min_1000 asset_min_ln %20.0g


*asset_max
tab asset_max

*id 16322 reported FMG instead Ariary:
replace asset_max = asset_max/5 if id=="16322"
*id 25322 reported FMG instead Ariary:
replace asset_max = asset_max/5 if id=="25322"
*id 17121 no answer was given for asset_max,but coded as 0:
replace asset_max = . if id=="17121"

label var asset_max "Asset max (Ariary)"

generate asset_max_1000= asset_max/1000
label var asset_max_1000 "Asset max (1000 Ariary)"

gen asset_max_ln = ln(asset_max)
label var asset_max_ln "Assets max (ln)"


format asset_max asset_max_1000 asset_max_ln %20.0g



*asset_goal

tab asset_goal, m
sum asset_goal //very high standard deviation

*id 16322 one 0 too much was coded:
replace asset_goal = 10000000000000 if id=="16322"
*id 25322 reported FMG instead Ariary and wrong number coded:
replace asset_goal = 99000000000000/5 if id=="25322"
*id 39121 reported FMG instead Ariary and wrong number coded:
replace asset_goal = 45000000/5 if id=="39121"
*08122 a 0 was not coded: 
replace asset_goal = 80000000 if id == "08122"
*id 16101 not correctly coded --> different currencies used --> use 500dollars in Ariary:
replace asset_goal = 1817500 if id == "16101"
*id 05222 wrongly entered, a 0 is missing 
replace asset_goal = 3000000000/5 if id == "05222"
*id 04112 assume one 0 was forgotten for asset_goal:
replace asset_goal = 18000000 if id == "04112"
*10322not correctly coded --> 100 billion for asset_goal:
replace asset_goal = 100000000000 if id == "10322"

label var asset_goal "Asset aspiration (Ariary)"

generate asset_goal_1000= asset_goal/1000
label var asset_goal_1000 "Asset aspiration (1000 Ariary)"

generate asset_goal_EUR = asset_goal*0.00025
label var asset_goal_EUR "Asset aspiration (EUR)"


gen asset_goal_ln = ln(asset_goal)
label var asset_goal_ln "Assets aspiration (ln)"

gen asset_goal_ln_EUR = asset_goal_ln*0.00025
label var asset_goal_ln_EUR "Asset aspiration (ln) in EUR"


format asset_goal asset_goal_1000 asset_goal_EUR asset_goal_ln %20.0g




*asset_exp

tab asset_exp, m

*id 25322 reported FMG instead Ariary and number wrongly coded:
replace asset_exp = 9000000000000/5 if id=="25322"
*id 39121 reported FMG instead Ariary:
replace asset_exp = asset_exp/5 if id=="39121"
*id 11111 all questions answered in FMG, but last one was coded as Ariary:
replace asset_exp = 125000000/5 if id== "11111"
*id 10322 not correctly coded --> 90 billion for asset_exp:
replace asset_exp = 90000000000 if id == "10322"

label var asset_exp "Asset expectation (Ariary)"

generate asset_exp_1000= asset_exp/1000
label var asset_exp_1000 "Asset expectation (1000 Ariary)"

gen asset_exp_ln = ln(income_goal)
label var asset_exp_ln "Asset expectation (ln)"

format asset_exp asset_exp_1000 asset_exp_ln %16.0g



*asset_achieved

tab asset_achieved, m

*id 16322 reported FMG instead Ariary:
replace asset_achieved = asset_achieved/5 if id=="16322"

*id 25322 reported FMG instead Ariary:
replace asset_achieved = asset_achieved/5 if id=="25322"

label var asset_achieved "Current assets (Ariary)"

generate asset_achieved_1000 = asset_achieved/1000
label var asset_achieved_1000 "Current assets (1000 Ariary)

gen asset_achieved_ln = ln(asset_achieved)
label var asset_achieved_ln "Asset achieved (ln)"


gen asset_achieved_EUR = asset_achieved*0.00025
label var asset_achieved_EUR "Asset achieved in EUR"


*income_min
tab income_min
*id 04401 coding mistake ---> should be 10.000.000 Ariary (50.000.000 FMG/5)
replace income_min = 50000000/5 if id == "04401"
*28101 min level is 2.000.000 instead of 200.000 
replace income_min = 200000 if id == "28101"

label var income_min "Income min (Ariary)"

generate income_min_1000= income_min/1000
label var income_min_1000 "Income min (1000 Ariary)"

gen income_min_ln = ln(income_min)
label var income_min_ln "Incomes min (ln)"

format income_min income_min_1000 income_min_ln %20.0g


*income_max
tab income_max

*id 16322 reported FMG instead Ariary:
replace income_max = income_max/5 if id=="16322"
*id 21321 max. level is 10.000.000 instead of 1.000.000:
replace income_max = 10000000 if id=="21321"

label var income_max "Income max (Ariary)"

generate income_max_1000 = income_max/1000
label var income_max_1000 "Income max (1000 Ariary)"

gen income_max_ln = ln(income_max)
label var income_max_ln "Incomes max (ln)"


format income_max income_max_1000 income_max_ln %20.0g




*income_goal

tab income_goal, m
sum income_goal 

*id 16322 reported FMG instead Ariary:
replace income_goal = income_goal/5 if id=="16322"
*id 02202 assume 14.000.000 in Ariary instead of FMG 
replace income_goal = 14000000 if id == "02202"
*39302 goal should be 800.000 Ar. instead of 500.000 Ar. 
replace income_goal = 800000 if id == "39302"

label var income_goal "Income aspiration (Ariary)"

generate income_goal_1000= income_goal/1000
label var income_goal_1000 "Income aspiration (1000 Ariary)"

generate income_goal_EUR = income_goal*0.00025
label var income_goal_EUR "Income aspiration (EUR)"

gen income_goal_ln = ln(income_goal)
label var income_goal_ln "Income aspiration (ln)"

gen income_goal_ln_EUR = income_goal_ln*0.00025
label var income_goal_ln_EUR "Income aspiration (ln) in EUR"


describe income_goal_1000 //%9.0g
format income_goal income_goal_1000 income_goal_ln %16.0g
tab income_goal_1000, m




*income_exp

tab income_exp, m
sum income_exp

*id 16322 reported FMG instead Ariary:
replace income_exp = income_exp/5 if id=="16322"
*id 10411 expectation is 10.000.000 Ar and should be 10.000 Ar. 
replace income_exp = 10000 if id == "10411"
*id 23302 expectation is 5.000.000 Ar and should be 1.000.000 Ar. 
replace income_exp = 10000000000 if id == "23302"

label var income_exp "Income expectation (Ariary)"

generate income_exp_1000= income_exp/1000
label var income_exp_1000 "Income expectation (1000 Ariary)"

gen income_exp_ln = ln(income_goal)
label var income_exp_ln "Income expectation (ln)"

format income_exp income_exp_1000 income_exp_ln %16.0g

*income_achieved

*id 16322 reported FMG instead Ariary:
replace income_achieved = income_achieved/5 if id=="16322"

label var income_achieved "Current income (Ariary)"

generate income_achieved_1000 = income_achieved/1000
label var income_achieved_1000 "Current income (1000 Ariary)"

generate income_achieved_EUR = income_achieved*0.00025
label var income_achieved_EUR "Current income (EUR)"

gen income_achieved_ln = ln(income_achieved)
label var income_achieved_ln "Current income (ln)"

*status_min
tab status_min,m 



*status_max
tab status_max, m
label var status_max "Status max"

*status_goal
tab status_goal //higher levels than 10 are also allowed
describe status_goal
label var status_goal "Status aspirat

*status_exp
tab status_exp





**total_weight sheme

tab total_weight 
replace total_weight = . if total_weight == 0
br total_weight weight_edu weight_inc weight_social weight_assets if total_weight !=20



foreach z of varlist total_weight {
	foreach y of varlist weight_edu {
	replace weight_edu=((`y'/`z')*20) if `z' <20 |`z'>20 
}
}
replace weight_edu=round(weight_edu,0.1)

foreach z of varlist total_weight {
	foreach y of varlist weight_inc {
	replace weight_inc=((`y'/`z')*20) if `z' <20 |`z'>20 
}
}
replace weight_inc=round(weight_inc,0.1)

foreach z of varlist total_weight {
	foreach y of varlist weight_social {
	replace weight_social=((`y'/`z')*20) if `z' <20 |`z'>20 
}
}
replace weight_social=round(weight_social,0.1)

foreach z of varlist total_weight {
	foreach y of varlist weight_assets {
	replace weight_assets=((`y'/`z')*20) if `z' <20 |`z'>20 
}
}
replace weight_assets=round(weight_assets,0.1)
*checking if variables add up to 20 (they do)

gen total_weight1=weight_edu+weight_inc+weight_social+weight_assets
order total_weight1, after(total_weight)
drop total_weight
rename total_weight1 total_weight



save "${Directory}Data/processed/schoolexp_cleaned.dta", replace




********************************************************************************
********************************************************************************
********************************************************************************




/**** Cleaning and Transforming Data from the Questionnaire*****/




use "${Directory}Data/raw/schoolques.dta", clear


*Rename and Label Variables
label define myes 0 "No" 1 "Yes"

label define mjob 1 "Public" 2 "Private"

label define mvideo 1 "Woman video" 2 "Man video" 0 "Placebo video", replace

label define magree 1 "strongly disagree" 2 "disagree" 3 "neither agree nor disagree" ///
4 "agree" 5 "strongly agree" 

label define mfemale 0 "Male" 1 "Female"

rename School city
label variable city "Location of School"

rename Treatment treatment
label variable treatment "Treatment"

rename Subtreatment subtreatment
label variable subtreatment "Subtreatment"

rename Session session
label variable session "Session"
label val session msession

rename Participant player
label variable player "Player"

label define mschool 1 "Andapa" 2 "Sambava" 3 " Antalaha" 4 "Vohemar"
labe val city mschool

recode treatment (3=0)
label val treatment mvideo

label val subtreatment mjob

**Difficulty

rename Didyouhavedifficultiesin difficulties_workshop
label variable difficulties_workshop "Difficulties Workshop"
label val difficulties_workshop myes

**Difficulties Activity variable**

rename Ifyeswhichone difficulties_activity

encode difficulties_activity, gen(difficulties_activity1)
recode difficulties_activity1 (1=0) (2=5) (3=2) (4=2) (5=5) (6=3) (7=3) (8=3) (9=3) ///
(10=3) (11=1) (12=1) (13=6) (.=0)
label drop difficulties_activity1
label define mdifficulties 0 "No difficulties" 1 "Triangle" 2 "Circle" 3"Rhombus" ///
4 "Carree" 5 "All" 6 "Video"
label val difficulties_activity1 mdifficulties
drop difficulties_activity
rename difficulties_activity1 difficulties_activity
label variable difficulties_activity "Difficulties Activity"
order difficulties_activity, before(Didyouunderstandtheinstru)
**Understand Instruction**

rename Didyouunderstandtheinstru understand_instructions
label variable understand_instructions "Understand Instructions"
label val understand_instructions myes

**Tiredness**

rename Didyougetexhaustedastime tiredness
label variable tiredness "Tiredness"

**Which Track

rename Whichtrackareyoustudying level_quest
label variable level_quest "Level of Study"
encode level_quest, gen(level1)
recode level1 (1=.)(2=1) (3=2) (4=3) (5=4)
label drop level1
label val level1 mlevel
drop level_quest
rename level1 level_quest
order level_quest, before(Whichoccupationdoyouwant)

**Creating science and language focus variable

gen science_focus=.
order science_focus, after(level_quest)
replace science_focus=1 if level==3 | level==4
replace science_focus=0 if level==1| level==2
label variable science_focus "Science focus"

gen language_focus=.
order language_focus, after(level_quest)
replace language_focus=1 if level==1| level==2
replace language_focus=0 if level==3| level==4
label variable language_focus "Language focus"

gen science_focus_high =. 
replace science_focus_high=1 if level==3 
replace science_focus_high=0 if level==1| level==2| level==4
label variable science_focus_high "Science focus high"

gen science_focus_low =. 
replace science_focus_low=1 if level==4 
replace science_focus_low=0 if level==1| level==2| level==3
label variable science_focus_low "Science focus low"

gen language_focus_high =. 
replace language_focus_high=1 if level==1 
replace language_focus_high=0 if level==2| level==3| level==4
label variable language_focus_high "Language focus high"

gen language_focus_low =. 
replace language_focus_low=1 if level==2
replace language_focus_low=0 if level==1| level==3| level==4
label variable language_focus_low "Language focus low"

***Occupation Variable

rename Whichoccupationdoyouwant occupation_desired
label variable occupation_desired "Desired Occupation"


***Years for Desired Occupation

rename Inhowmanyyearsdoyouexpe expected_years_occ
label variable expected_years_occ "Years to achieve desired occupation"

***How likely occupation
label define mlikely 1 "Very unlikely" 2 "Unlikely" 3 "50% Likely" 4 "Likely" 5 "Very Likely"
 
rename Howlikelyisitthatyouwil occupation_likelihood
label variable occupation_likelihood "Likelihood to achieve desired occupation"
label val occupation_likelihood mlikely

**Likely university
rename Howlikelyisitthatyougo university_likelihood
label variable university_likelihood "Likelihood to attend university"
label val university_likelihood mlikely


***Likely bad luck
rename Howlikelyisitthatdestiny badluck_likelihood
label variable badluck_likelihood "Likelihood bad luck"
label val badluck_likelihood mlikely


*** Education mother
label define meducation 0 "No education" 1 "Primary education" 2 "Secondary education" ///
3 "University Degree"


rename Whatisthehighestschoold education_mother
label variable education_mother "Highest school degree mother"
label val education_mother meducation

tab education_mother, m
sum education_mother
replace education_mother = 1.623955 if education_mother == .

**Education Father

rename Q education_father
label variable education_father "Highest school degree father"
label val education_father meducation

**Marriage age

rename Atwhichagewouldyoulike marriage_age
label variable marriage_age "Desired age to get married"

replace marriage_age = . if marriage_age < 17 //values 1,3,4 and 7 do not make sense


**Meeting outsider variable
label define moften 1 "never" 2 "once a year" 3 "once a month" 4 "once a week" ///
5 "every day"


rename Howoftendoyoumeetother meeting_outsiders
label variable meeting_outsiders "Frequency meeting outsiders"
label val meeting_outsider moften

**Using smartphone

rename Howoftendoyouuseasmart smartphone_use
label variable smartphone_use "Frequency smartphone usage"
label val smartphone_use moften

**Having rolemodel

rename Doyouhaveapersonyouloo role_model
label variable role_model "Role model exist"
label val role_model myes

**which gender role model

rename V gender_model
label variable gender_model "Gender role model"
label val gender_model mfemale

**Location Role model***

rename W location_model
label variable location_model "Location role model"


***Career for men

rename Tofollowacareerismo belief_career
label variable belief_career "Career is typical for men"
label val belief_career magree

**Belief women can achieve

rename Y belief_women_achievers
label variable belief_women_achievers "Women can achieve career goals"
label val belief_women_achievers magree


**Belief men performance

rename Menaretypicallyperfor belief_men_performance
label variable belief_men_performance "Men performe better in competitive tasks"
label val belief_men_performance magree

***Belief men better

rename Menarebetteratcountin belief_men_better
label variable belief_men_better "Men are better in task"
label val belief_men_better magree

***Belief women ability

rename Menbelievethatwomena belief_women_ability
label variable belief_women_ability "Beliefs men on women's task ability"
label val belief_women_ability magree

**Competition preference

rename AC competition_pref
label variable competition_pref "Competitive preferences"


***Risk aversion

rename Areyougenerallywillingto risk_aversion_sub
label variable risk_aversion_sub "Risk preferences subjective"


***preferences time 1-3
label define mpatience 1"today" 0 "in one year"


rename AE time_preferences_1
label variable time_preferences_1 "Impatience level 1"
label val time_preferences_1 mpatience

rename AF time_preferences_2
label variable time_preferences_2 "Impatience level 2"
label val time_preferences_2 mpatience

rename AG time_preferences_3 
label variable time_preferences_3 "Impatience level 3"
label val time_preferences_3 mpatience


***Loss aversion 1-6
label define maccept 1"accept" 0 "reject"

rename AH loss_aversion_obj_1
label variable loss_aversion_obj_1 "Loss aversion_objective 1"
label val loss_aversion_obj_1 maccept

rename AI loss_aversion_obj_2
label variable loss_aversion_obj_2 "Loss aversion_objective 2"
label val loss_aversion_obj_2 maccept

rename AJ loss_aversion_obj_3
label variable loss_aversion_obj_3 "Loss aversion_objective 3"
label val loss_aversion_obj_3 maccept


rename AK loss_aversion_obj_4
label variable loss_aversion_obj_4 "Loss aversion_objective 4"
label val loss_aversion_obj_4 maccept


rename AL loss_aversion_obj_5
label variable loss_aversion_obj_5 "Loss aversion_objective 5"
label val loss_aversion_obj_5 maccept

rename AM loss_aversion_obj_6
label variable loss_aversion_obj_6 "Loss aversion_objective 6"
label val loss_aversion_obj_6 maccept

**Confidence math


rename AN confidence_math
label variable confidence_math "Confidence Math"

**Confidence biology

rename AO confidence_biology
label variable confidence_biology "Confidence Biology"

**Confidence Language

rename AP confidence_language
label variable confidence_language "Confidence language"


**Confidence Computer

rename AQ confidence_computer
label variable confidence_computer "Confidence computer"

**Confidence Science

rename AR confidence_science
label variable confidence_science "Confidence Science"

***Confidence Foreign

rename AS confidence_foreign
label variable confidence_foreign "Confidence foreign language"


***Confidence Social

rename AT confidence_social
label variable confidence_social "Confidence Social studies"

***General Self-efficacy Scale 


rename AU self_solve_difficulties
label variable self_solve_difficulties "Ability to solve problems"


rename AV self_overcome_obstacles
label variable self_overcome_obstacles "Ability to overcome obstacles"


rename AW self_achiever
label variable self_achiever "Ability to achieve"

rename AX self_unexpected_event
label variable self_unexpected_event "Ability to deal with unexpected event"

rename AY self_resourceful
label variable self_resourceful "Resourceful"


rename AZ self_effort_solving
label variable self_effort_solving "Effort to solve problems"


rename BA self_calm
label variable self_calm "Calm in difficulties"


rename BB self_think_solution
label variable self_think_solution "Think a solution"


rename BC self_think_solution2
label variable self_think_solution2 "Think a solution2"

rename BD self_handle
label variable self_handle "Handle whatever comes"


***Locus statements 1-6
*label define mstatement 1"I agree much more with A" 2 "I agree slightly more with A"  ///
*3 "I agree slightly more with B" 4 "I agree much more with B"

rename BE locus_statement1
label variable locus_statement1 "Locus of control statement 1"
*label val locus_statement1 mstatement

rename BF locus_statement2
label variable locus_statement2 "Locus of control statement 2"
*label val locus_statement2 mstatement

rename BG locus_statement3
label variable locus_statement3 "Locus of control statement 3"
*label val locus_statement3 mstatement

rename BH locus_statement4
label variable locus_statement4 "Locus of control statement 4"
*label val locus_statement4 mstatement

rename BI locus_statement5
label variable locus_statement5 "Locus of control statement 5"
*label val locus_statement5 mstatement

rename BJ locus_statement6
label variable locus_statement6 "Locus of control statement 6"
*label val locus_statement6 mstatement

***

rename Withhowmanypeopleinyour friends
label variable friends "Friends group"
codebook friends

***Year born


rename Inwhichyearwereyouborn year
label variable year "Year born"
codebook year,tab(11)

***City raised
label define mcity 0 "Rural area" 1 "big city"
rename Wherehaveyoubeenraised city_raised 
label variable city_raised "Raised in city"
label val city_raised mcity

**Gender
rename Whatisyourgender1Vav female
label variable female "Female"
label val female mfemale


label define mmale 0 "Female" 1 "Male"
generate male = 0
replace male = 1 if female == 0
label var male "Male"
label val male mmale


**How many brothers

rename Howmanybrothersdoyouhav no_brothers
label variable no_brothers "Number of brothers"

**How many sisters

rename Howmanysistersdoyouhave no_sisters
label variable no_sisters "Number of sisters"
label val no_sisters .


***economic status

rename Pleaseindicateyoureconomi economic_status 
label variable economic_status "Economic status"
label define economic 1 "Poor" 2 "Rather poor" 3 "Neither poor/wealthy" 4 "Rather wealthy" 5 "Wealthy"
label val economic_status economic


***Job variable

rename Doyouhaveajob job_present
label variable job_present "Job at present"
label val job_present myes 

**How many hrs worked

rename IFYESHowmanyhoursawee hours_work
label variable hours_work "Hours per week"

***Vanilla cultivating

rename Doesyourfamilycultivatev vanilla_family
label variable vanilla_family "Vanilla family"
label val vanilla_family myes

**Frequency market visits

rename Howoftendoyougotothem market_often
label variable market_often "Market visit"
label val market_often moften


**** create unique ID

gen id= string(player) + string(city) + string(treatment) + string(subtreatment) 

gen id_1 = string(real(id),"%05.0f")

drop id

rename id_1 id

order id, after(city)



codebook occupation_desired, tab(90)

encode occupation_desired, gen(occupation_desired1)

codebook occupation_desired1, tab(90)



***recode and label define occupation for Nugyen 

recode occupation_desired1 (1=.)(2=3) (3=3) (4=3) (5=1) (6=1) (7=1) (8=3) (9=3) ///
(10=3) (11=2) (12=1) (13=3) (14=2) (15=2) (16=3) (17=2) (18=3) (19=2) (20=3) ///
(21=3) (22=3) (23=3) (24=3) (25=3) (26=3) (27=3) (28=3) (29=3) (30=1) (31=2) (32=3) ///
(33=3) (34=3) (35=3) (36=3) (37=3) (38=3) (39=3) (40=3) (41=2) (42=3) (43=2) (44=3) ///
(45=3) (46=3) (47=3) (48=3) (49=3) (50=2) (51=2) (52=2) (53=2) (54=3) (55=2) (56=3) ///
(57=2) (58=2) (59=2) (60=2) (61=2) (62=2) (63=3) (64=3) (65=2) (66=2) (67=2) (68=2)  ///
(69=3) (70=3) (71=2) (72=2) (73=2) (74=2) (75=2) (76=3) (77=2) (78=3) (79=2) (80=3)  ///
(81=3) (82=3) (83=2) (84=3) (85=2) (86=2) (87=2) (88=3) (89=2) (90=2)

label drop occupation_desired1

label define moccupation 1 "Primary Activities" 2 "Public Sector" 3 "Commerce/Private Sector"

label val occupation_desired1 moccupation
rename occupation_desired1 occupation_desired_Nguyen
order occupation_desired_Nguyen, after(occupation_desired)

***recode for 4 levels of education neeeded to work in desired occupation

encode occupation_desired, gen(occupation2)

recode occupation2 (1=.)(2=3) (3=3) (4=3) (5=4) (6=4) (7=4) (8=3) (9=3) ///
(10=3) (11=3) (12=2) (13=2) (14=4) (15=4) (16=4) (17=4) (18=4) (19=4) (20=4) ///
(21=4) (22=2) (23=4) (24=4) (25=2) (26=3) (27=4) (28=2) (29=4) (30=1) (31=2) (32=1) ///
(33=1) (34=1) (35=1) (36=3) (37=4) (38=4) (39=4) (40=4) (41=4) (42=4) (43=4) (44=4) ///
(45=4) (46=3) (47=3) (48=1) (49=1) (50=1) (51=4) (52=4) (53=4) (54=3) (55=1) (56=3) ///
(57=3) (58=2) (59=4) (60=4) (61=4) (62=4) (63=3) (64=3) (65=2) (66=2) (67=4) (68=4)  ///
(69=3) (70=3) (71=2) (72=4) (73=4) (74=4) (75=4) (76=2) (77=4) (78=2) (79=3) (80=3)  ///
(81=1) (82=1) (83=4) (84=1) (85=3) (86=4) (87=3) (88=3) (89=2) (90=4)

label drop occupation2

label define moccupationlevel 1 "No Primary Education" 2 "Highschool Diploma" ///
3 "Secondary Education" 4 "Academical Level"

label val occupation2 moccupationlevel

codebook occupation2

drop occupation_desired
rename occupation2 occupation_desired
codebook occupation_desired
order occupation_desired, before(occupation_desired_Nguyen)
label variable occupation_desired_Nguyen "Education needed for desired occupation"


***model_location recoding


encode location_model, gen(location_model1)
codebook location_model1
br location_model1 role_model


recode location_model1 (0=.) (2=4) (3=1) (4=1) (5=3) (6=3) (7=2) (8=2) (9=1) ///
(10=3) (11=3) (12=2) (13=4) (14=4) (15=4) (16=4) (17=4) (18=4) ///
(19=2) (20=2) (21=2) (22=4) (23=3) (24=3) (25=4) (26=4) (27=4) ///
(28=4) (29=4) (30=4) (31=4) (32=4) (33=4) (34=3) (35=4) (36=2) ///
(37=2) (38=3) (39=2) (40=2) (41=.) (42=4) (43=3) (44=2) (45=4) ///
(46=4) (47=4) (48=.) (49=4) (50=1) (51=4) (52=1) (53=3) (54=3) (55=4) ///
(56=1) (57=1) (58=4)

codebook location_model1
replace location_model1 = 0 if role_model == 0

label define mlocation 1 "Close" 2 "Region" 3 "Madagascar" 4 "Outside" 0 "No Role Model"
label val location_model1 mlocation

codebook location_model1
order location_model1,after (location_model)
drop location_model
rename location_model1 location_model





*********************************************************************************************************************************

**MERGING DATASETS



merge 1:1 id using "${Directory}Data/processed/schoolexp_cleaned.dta"
drop _merge



***********************************************************************************************************************************

**ADDITIONAL CODING:


drop Comment

label define mprivate 1 "public" 2 "private"
label val subtreatment mprivate



***generate the age variable out of year of birth
gen age=0
order age, after(year)
replace age=(2018-year)
sum age 
replace age = 19.10 if age==.
label variable age "Age in years"


***replace the three missing values for gender dummy, gender determinate by age

replace female=0 if id =="04102"

replace female=1 if id=="14101"

replace female=1 if id=="39101"

replace male = 0 if female == 1 



*generate a group id for assessing who won in the group
gen id_group= string(group) + string(city) +string(treatment)+string(session)
label var id_group "Unique identifier for the group" 

sort id_group


*CONSISTENCY CHECK


***TASK 3: how many winners are there in every group:
tab id_group winner_3



//for those groups in which we did not randomly chose a winner, create a random variable:
set seed 12345
generate random = runiform()

//and then create a variable that takes on the highest value in the group among the Andapa groups.
egen max_random = max(random), by(id_group)


*ANDAPA
//and then replace the winner with 1 if she/he has the highest number in those cases where we did not chose a winner randomly:
replace winner_3 = 1 if random == max_random  & id_group == "2101"
replace winner_3 = 1 if random == max_random  & id_group == "3101"
replace winner_3 = 1 if random == max_random  & id_group == "5101"
replace winner_3 = 1 if random == max_random  & id_group == "9101"


*SAMBAVA
//the same as above
replace winner_3 = 1 if random == max_random  & id_group == "3201"
replace winner_3 = 1 if random == max_random  & id_group == "4211"



tab id_group winner_3

br id_group winner_3 treatment session city player id
*for group 3122: player 9 in Andapa man video and morning session was alone in is group --> drop observation
drop if id == "09122"
*for group 3421: there are only two members
bysort id_group: egen no_competitors=count(id)


br id id_group female if no_competitors == 2 
drop if no_competitors == 2

*drop observations that state they do not understand the instructions
drop if understand_instructions == 0 

bysort id_group: egen no_competitors_new=count(id)
tab no_competitors_new





**Gender balance

//generate a variable that takes on 1 if there are no equal number of men and female in the group
sort id_group
bysort id_group: egen no_female=sum(female)

br female id_group no_female


generate gender_imb = 0 
bysort id_group: replace gender_imb= 1 if no_female !=2  
label var gender_imb "Gender imbalance group"



 
***NEW VARIABLES:



* applied for assistant position, 0= did not apply
generate applied_assistant = 0 
replace applied_assistant = 1 if complete_app == 1 & position_job ==0
label define assistant 0 "No application" 1 "Applied to assistant position"
label values applied_assistant assistant
label var applied_assistant "Assistant position"

* applied for coordinator position, 0= did not apply
generate applied_coordinator = 0
replace applied_coordinator = 1 if complete_app == 1 & position_job ==1
label define coordinator 0 "No application" 1 "Applied to coordinator position"
label values applied_coordinator coordinator
label var applied_coordinator "Coordinator position"

*woman video treatment dummy
gen woman_video=0
replace woman_video=1 if treatment==1
label var woman_video "Woman video"

*man video treatment dummy
gen man_video=0
replace  man_video=1 if treatment==2
label var man_video "Man video"

*placebo treatment dummy
gen placebo_video=0
replace placebo_video=1 if treatment==0
label var placebo_video "Placebo video"

 

*construct a variable capturing the performance difference from task2 and task1:
generate perf_diff21 = ptask_2 - ptask_1 //to control for performance improvements (learning effects)
label var perf_diff21 "Performance difference 2-1"


gen no_siblings = no_brothers + no_sisters
label var no_siblings "Number of siblings"  


generate average_perf = (ptask_1 + ptask_2 + ptask_3 + ptask_5)/4
label var average_perf "Average performance"


generate city1=0
replace city1 = 1 if city == 1

generate city2=0
replace city2 = 1 if city == 2

generate city3=0
replace city3 = 1 if city == 3

generate city4=0
replace city4 = 1 if city == 4




generate treat_sess = 0
replace treat_sess = 1 if city == 1 & session== 1 & treatment == 0
replace treat_sess = 2 if city == 1 & session== 1 & treatment == 1
replace treat_sess = 3 if city == 1 & session== 1 & treatment == 2
replace treat_sess = 4 if city == 1 & session== 2 & treatment == 0
replace treat_sess = 5 if city == 1 & session== 2 & treatment == 1
replace treat_sess = 6 if city == 1 & session== 2 & treatment == 2


replace treat_sess = 7 if city == 2 & session== 1 & treatment == 0
replace treat_sess = 8 if city == 2 & session== 1 & treatment == 1
replace treat_sess = 9 if city == 2 & session== 1 & treatment == 2

replace treat_sess = 10 if city == 3 & session== 1 & treatment == 0
replace treat_sess = 11 if city == 3 & session== 1 & treatment == 1
replace treat_sess = 12 if city == 3 & session== 1 & treatment == 2


replace treat_sess = 13 if city == 4 & session== 1 & treatment == 0
replace treat_sess = 14 if city == 4 & session== 1 & treatment == 1
replace treat_sess = 15 if city == 4 & session== 1 & treatment == 2

br treat_sess city session treatment

gen identify = 0
replace identify = 1 if identify_character == 4 | identify_character ==5

gen like = 0
replace like=1 if like_character==4 | like_character ==5

generate high_stereo_threat = 0
replace high_stereo_threat =1 if belief_men_performance == 5 | belief_men_performance == 4 
replace high_stereo_threat = . if belief_men_performance == 3

label var high_stereo_threat "Stereotype"


*fraction of males in session
sort treat_sess
by treat_sess: egen count_female=total(female==1)
by treat_sess: egen count_male=total(female==0)

generate total_no_sess = count_male + count_female
generate fra_male_sess=count_male/total_no_sess
label var fra_male_sess "Fraction of males"



***Stated preference for competition
tab competition_pref // How do you like to be in competition with somebody else? 1= I don’t like this … 5= I like it very much



***Risk aversion

*taken from https://www.diw.de/de/soep)

tab risk_aversion_sub // Are you generally willing to take risks, or do you try to avoid risks? 1= not willing to take a risk … 10= fully prepare to take risks
sum risk_aversion_sub //mean is very high:  8.3


***Loss aversion:

*values taken from Gächter, S., Johnson, E. J., & Herrmann, A. (2010). Individual-level loss aversion in riskless and risky choices.


*formular:  λrisky = ω*(3000α/Lβ), ω ≡ w+(0.5)/w-(0.5) 
tab loss_aversion_obj_1, m
tab loss_aversion_obj_2, m
tab loss_aversion_obj_3, m
tab loss_aversion_obj_4, m
tab loss_aversion_obj_5, m
tab loss_aversion_obj_6, m



generate loss_aversion = 0
replace loss_aversion = 4 if loss_aversion_obj_1 == 0 & loss_aversion_obj_2 == 0 & ///
						loss_aversion_obj_3 == 0 & loss_aversion_obj_4 == 0 & ///
						loss_aversion_obj_5 == 0 & loss_aversion_obj_6 == 0 
replace loss_aversion = 3 if loss_aversion_obj_1 == 1  & loss_aversion_obj_2 == 0 & ///
						loss_aversion_obj_3 == 0 & loss_aversion_obj_4 == 0 & ///
						loss_aversion_obj_5 == 0 & loss_aversion_obj_6 == 0 						
replace loss_aversion = 2 if loss_aversion_obj_1 == 1  & loss_aversion_obj_2 == 1 & ///
						loss_aversion_obj_3 == 0 & loss_aversion_obj_4 == 0 & ///
						loss_aversion_obj_5 == 0 & loss_aversion_obj_6 == 0 							
replace loss_aversion = 1.5 if loss_aversion_obj_1 == 1  & loss_aversion_obj_2 == 1 & ///
						loss_aversion_obj_3 == 1 & loss_aversion_obj_4 == 0 & ///
						loss_aversion_obj_5 == 0 & loss_aversion_obj_6 == 0 	
replace loss_aversion = 1.2 if loss_aversion_obj_1 == 1  & loss_aversion_obj_2 == 1 & ///
						loss_aversion_obj_3 == 1 & loss_aversion_obj_4 == 1 & ///
						loss_aversion_obj_5 == 0 & loss_aversion_obj_6 == 0 							
replace loss_aversion = 1.0 if loss_aversion_obj_1 == 1  & loss_aversion_obj_2 == 1 & ///
						loss_aversion_obj_3 == 1 & loss_aversion_obj_4 == 1 & ///
						loss_aversion_obj_5 == 1 & loss_aversion_obj_6 == 0 
replace loss_aversion = - 0.858 if loss_aversion_obj_1 == 1  & loss_aversion_obj_2 == 1 & ///
						loss_aversion_obj_3 == 1 & loss_aversion_obj_4 == 1 & ///
						loss_aversion_obj_5 == 1 & loss_aversion_obj_6 == 1						
//the lower the value the higher aversion to losses					

*check whether they only switch once (manually):
br id loss_aversion_obj_1 loss_aversion_obj_2 loss_aversion_obj_3 loss_aversion_obj_4 loss_aversion_obj_5 loss_aversion_obj_6
replace loss_aversion = . if id == "01122"
replace loss_aversion = . if id == "03111"
replace loss_aversion = . if id == "03122"
replace loss_aversion = . if id == "05122"
replace loss_aversion = . if id == "07102"
replace loss_aversion = . if id == "07121"
replace loss_aversion = . if id == "08122"
replace loss_aversion = . if id == "09111"
replace loss_aversion = . if id == "09112"
replace loss_aversion = . if id == "09121"
replace loss_aversion = . if id == "10222"
replace loss_aversion = . if id == "11121"
replace loss_aversion = . if id == "14222"
replace loss_aversion = . if id == "17222"
replace loss_aversion = . if id == "18222"
replace loss_aversion = . if id == "21222"
replace loss_aversion = . if id == "22222"
replace loss_aversion = . if id == "23101"
replace loss_aversion = . if id == "23222"
replace loss_aversion = . if id == "24101"
replace loss_aversion = . if id == "24222"
replace loss_aversion = . if id == "25121"
replace loss_aversion = . if id == "27111"
replace loss_aversion = . if id == "28101"
replace loss_aversion = . if id == "28111"
replace loss_aversion = . if id == "29121"
replace loss_aversion = . if id == "30111"
replace loss_aversion = . if id == "32121"
replace loss_aversion = . if id == "34121"
replace loss_aversion = . if id == "35121"
replace loss_aversion = . if id == "40121"



replace loss_aversion = . if id == "01121" | id == "04401" | id == "06102" | id == "06122" | id == "07121" | id == "08122" | id == "09122" | id == "10202" | id == "10301" | id == "11102" | id == "12102" | id == "14121" | id == "14222" | id == "15222" | id == "16111" | id == "17222" | id == "19111" | id == "23101" | id == "24202" | id == "25302" | id == "29121" | id == "30321" | id == "38101" | id == "38111"




***Self-Efficacy for Academic Achievement (Bandura, 2006)

/*
Taken from: 
Bandura, A. (2006). Guide for constructing self-efficacy scales. 
In F. Pajares & T. C. Urdan (Hrsg.), Self-efficacy beliefs of adolescents (Bd. 5, S. 307–337). IAP - Information Age Pub., Incorporated.
*/

tab confidence_math, m
tab confidence_biology, m
tab confidence_language, m
tab confidence_computer, m
tab confidence_science, m
tab confidence_foreign, m
tab confidence_social, m


*we can use an aggregated measure?:

generate sefficacy_academic = confidence_math + confidence_biology + confidence_language ///
+ confidence_computer + confidence_science + confidence_foreign + confidence_social
*23 missing values 

label var sefficacy_academic "Academic self-efficacy"


sum sefficacy_academic if treatment == 0 //40.97248
replace sefficacy_academic = 40.97248 if sefficacy_academic ==.& treatment ==0

sum sefficacy_academic if treatment == 1 //45.47009
replace sefficacy_academic =  45.47009 if sefficacy_academic ==.& treatment ==1

sum sefficacy_academic if treatment == 2 //43.04505
replace sefficacy_academic = 43.04505 if sefficacy_academic ==.& treatment ==2


tab sefficacy_academic, m


***General Self-Efficacy Scale (GSE)


/*from: 
http://userpage.fu-berlin.de/~health/engscal.htm : 
"Scoring: Responses are made on a 4-point scale. Sum up the responses to all 10 items to yield the final composite score with a range from 10 to 40. No recoding."
used in Haushofer, John and Orkin (2018) What Motivates Health Behavior
*/

tab self_solve_difficulties, m // I can always manage to solve difficult problems if I try hard enough.
replace self_solve_difficulties = . if self_solve_difficulties == 7
tab self_overcome_obstacles, m // If someone opposes me, I can find the means and ways to get what I want.
tab self_achiever, m // It is easy for me to stick to my aims and accomplish my goals.
tab self_unexpected_event, m // I am confident that I could deal with unexpected events.
tab self_resourceful, m // Thanks to my resourcefulness, I know how to handle unforeseen situations.
tab self_effort_solving, m // I can solve most problems if I invest the necessary effort.
tab self_calm, m // I can remain calm when facing difficulties because I can rely on my abilities to reduce stress.
tab self_think_solution, m // When I have problems I usually think of a solution.
tab self_think_solution2, m // If I am in trouble, I can usually think of a solution.
tab self_handle, m // I can usually handle whatever comes on my way.


generate general_sefficacy = self_solve_difficulties + self_overcome_obstacles + self_achiever + self_unexpected_event + self_resourceful + self_effort_solving + self_calm + self_think_solution + self_think_solution2 + self_handle 
//23 missing values generated 
label var general_sefficacy "General self-efficacy"


sum general_sefficacy if treatment == 0 // 36.73554
replace general_sefficacy =  36.73554 if general_sefficacy ==.& treatment ==0

sum general_sefficacy if treatment == 1 //37.64912
replace general_sefficacy =  37.64912 if general_sefficacy ==.& treatment ==1

sum general_sefficacy if treatment == 2 //38.60784
replace general_sefficacy = 38.60784 if general_sefficacy ==.& treatment ==2



***Generalized locus of control measure

/*
(a subset) of questions were taken from:
Rotter, J. B. (1966). Generalized expectancies for internal versus external control of reinforcement.
 Psychological Monographs: General and applied, 80(1), 1–28.
 
The following reference used a similar indicator (however only 2 distinct questions) for their measure of locus of control: 
Bernard, T., Dercon, S., & Taffesse, A. S. (2012). 
Beyond Fatalism: An Empirical Exploration of Self-Efficacy and Aspirations Fail-ure in Ethiopia (S. 20).

 
Our score is the total number of internal choices (Rotter uses the total number of external choices).

*/

tab locus_statement1 //B= internal locus of control
replace locus_statement1 = . if locus_statement1 == 5
tab locus_statement2 //A= internal locus of control
tab locus_statement3 //A= internal locus of control
tab locus_statement4 //A= internal locus of control
tab locus_statement5 //B= internal locus of control
tab locus_statement6 //A= internal locus of control

//1=agree much more with A, 4= I agree much more with B
generate locus_control = 0
replace locus_control = 1 if locus_statement1 == 3 | locus_statement1 == 4
replace locus_control = locus_control + 1 if locus_statement2 == 1 | locus_statement2 == 2
replace locus_control = locus_control + 1 if locus_statement3 == 1 | locus_statement3 == 2
replace locus_control = locus_control + 1 if locus_statement4 == 1 | locus_statement4 == 2
replace locus_control = locus_control + 1 if locus_statement5 == 3 | locus_statement5 == 4
replace locus_control = locus_control + 1 if locus_statement6 == 1 | locus_statement6 == 2

br locus_statement1 locus_statement2 locus_statement3 locus_statement4 locus_statement5 locus_statement6 locus_control
label var locus_control "Locus of control"

tab locus_control, m




***Locus of control with respect to their desired occupation
// 9.How likely is it that destiny, good or bad luck or other people affect your chances to work in your desired occupation? 
// 1=Very unlikely, 5 Very likely
codebook badluck_likelihood 
tab badluck_likelihood
hist badluck_likelihood




***Time preferences variable

 gen patience = .
 replace patience = 1 if time_preferences_1 == 1
 replace patience = 2 if time_preferences_1 == 0 & time_preferences_2 == 1 &time_preferences_3 ==1
 replace patience = 2 if time_preferences_1 == 0 & time_preferences_2 == 1 
 replace patience = 3 if time_preferences_1 == 0 & time_preferences_2 == 0 & time_preferences_3==1
  replace patience = 3 if time_preferences_1 == 0 & time_preferences_2 == 0 & time_preferences_3==.
replace patience = 4 if time_preferences_1 == 0 & time_preferences_2 == 0 & time_preferences_3==0




*** Students who were not interested but applied to a position (they were not interviewed) 

gen complete_app_new =  complete_app // This variable only all that applied 

label var complete_app_new "Complete application all"

replace complete_app_new  = 1 if id == "05411"
replace complete_app_new  = 1 if id == "13212"
replace complete_app_new  = 1 if id == "17212"
replace complete_app_new  = 1 if id == "03202"
replace complete_app_new  = 1 if id == "02401"
replace complete_app_new  = 1 if id == "10101"
replace complete_app_new  = 1 if id == "25101"




*** Role models 

replace role_model = 1 if id == "35101"
replace role_model = 1 if id ==  "02112" 
replace role_model = 1 if id == "04112"  
replace role_model = 1 if id ==  "04122"


********************************************************************************


* Aspiration Index 




global base_asp "education_goal income_goal_1000 status_goal asset_goal_1000"
												
						
foreach x in $base_asp{			
			egen `x'_placebo_mean =mean(`x') if treatment == 0
			}
			
foreach x in $base_asp{			
			tab `x'_placebo_mean
					}
			
/*


education_g |
oal_placebo |
      _mean |      Freq.     Percent        Cum.
------------+-----------------------------------
   19.23387 |        124      100.00      100.00
------------+-----------------------------------
      Total |        124      100.00

income_goal |
_1000_place |
    bo_mean |      Freq.     Percent        Cum.
------------+-----------------------------------
   904022.3 |        124      100.00      100.00
------------+-----------------------------------
      Total |        124      100.00

status_goal |
_placebo_me |
         an |      Freq.     Percent        Cum.
------------+-----------------------------------
    9.17742 |        124      100.00      100.00
------------+-----------------------------------
      Total |        124      100.00

asset_goal_ |
1000_placeb |
     o_mean |      Freq.     Percent        Cum.
------------+-----------------------------------
    3230386 |        124      100.00      100.00
------------+-----------------------------------
      Total |        124      100.00

*/


replace education_goal_placebo_mean = 19.23387   if education_goal_placebo_mean ==. 

replace income_goal_1000_placebo_mean = 904022.3 if income_goal_1000_placebo_mean == . 


replace status_goal_placebo_mean =   9.17742  if status_goal_placebo_mean == . 


replace asset_goal_1000_placebo_mean = 3230386 if asset_goal_1000_placebo_mean == .   
	
			
			
			
*generate the standard deviation
foreach x in $base_asp{			
			egen `x'_placebo_sd =sd(`x') if treatment == 0
			}
			
			
			
foreach x in $base_asp{			
			tab `x'_placebo_sd
					}
						
			
			
/*


education_g |
oal_placebo |
        _sd |      Freq.     Percent        Cum.
------------+-----------------------------------
   2.862312 |        124      100.00      100.00
------------+-----------------------------------
      Total |        124      100.00

income_goal |
_1000_place |
      bo_sd |      Freq.     Percent        Cum.
------------+-----------------------------------
    9045167 |        124      100.00      100.00
------------+-----------------------------------
      Total |        124      100.00

status_goal |
_placebo_sd |      Freq.     Percent        Cum.
------------+-----------------------------------
    10.6573 |        124      100.00      100.00
------------+-----------------------------------
      Total |        124      100.00

asset_goal_ |
1000_placeb |
       o_sd |      Freq.     Percent        Cum.
------------+-----------------------------------
   1.51e+07 |        124      100.00      100.00
------------+-----------------------------------
      Total |        124      100.00

*/		


replace education_goal_placebo_sd = 2.862312 if education_goal_placebo_sd == .


replace income_goal_1000_placebo_sd =   9045167  if income_goal_1000_placebo_sd == .		



replace status_goal_placebo_sd =   10.6573 if status_goal_placebo_sd == . 


replace asset_goal_1000_placebo_sd =  15100000   if asset_goal_1000_placebo_sd == .					

			
			
			
		
			
*generate the standardized measure for each dimension
foreach x in $base_asp{			
			gen `x'_stand=(`x'-`x'_placebo_mean)/`x'_placebo_sd
			}
			
foreach x in $base_asp{			
			tab `x'_stand
			sum `x'_stand
			}		
			
			
			
			

tab education_goal_stand, m //2
tab income_goal_1000_stand, m //2
tab asset_goal_1000_stand, m //2
tab status_goal_stand, m //1

*replace missings with mean value of each treatment group:

sum education_goal_stand if treatment ==0 //-1.99e-07
replace education_goal_stand = .000000199 if education_goal_stand ==. & treatment == 0
// 0 changes
sum education_goal_stand if treatment ==1 //.2484165
replace education_goal_stand = .2484165 if education_goal_stand ==. & treatment == 1
//1 change
sum education_goal_stand if treatment ==2 //.3369326
replace education_goal_stand =  .3369326 if education_goal_stand ==. & treatment == 2
//1 change



sum income_goal_1000_stand if treatment ==0 //1.68e-09
replace income_goal_1000_stand = .00000000168 if income_goal_1000_stand ==. & treatment == 0
//1 change
sum income_goal_1000_stand if treatment ==1 //-.0945534
replace income_goal_1000_stand = -.0945534 if income_goal_1000_stand ==. & treatment == 1
//0 changes
sum income_goal_1000_stand if treatment ==2 //1725.544
replace income_goal_1000_stand =  1725.544 if income_goal_1000_stand ==. & treatment == 2
//1 change



sum asset_goal_1000_stand if treatment ==0 //-3.42e-09
replace asset_goal_1000_stand = .00000000342 if asset_goal_1000_stand ==. & treatment == 0
// 0 changes
sum asset_goal_1000_stand if treatment ==1 //.2290761
replace asset_goal_1000_stand = .2290761 if asset_goal_1000_stand ==. & treatment == 1
//0 changes
sum asset_goal_1000_stand if treatment ==2 // 63.73836 
replace asset_goal_1000_stand =  63.73836  if asset_goal_1000_stand ==. & treatment == 2
//2 changes



sum status_goal_stand  if treatment ==0 //-2.68e-08 
replace status_goal_stand  = .0000000268  if status_goal_stand  ==. & treatment == 0
//1 change
sum status_goal_stand  if treatment ==1 //-.0024546
replace status_goal_stand  = -.0024546 if status_goal_stand  ==. & treatment == 1
//0 changes
sum status_goal_stand  if treatment ==2 //-.0514304
replace status_goal_stand  =  -.0514304 if status_goal_stand  ==. & treatment == 2
//1 change



			
			
			
			
*weight the dimensions		


//use the percentage of the weight	
generate perc_weight_edu2 = weight_edu/20 if treatment==0 
generate perc_weight_inc2 = weight_inc/20 if treatment==0 
generate perc_weight_assets2 = weight_assets/20 if treatment==0 
generate perc_weight_social2 = weight_social/20  if treatment==0 

sum perc_weight_edu2 //.378629
sum perc_weight_inc2 // .22125
sum perc_weight_assets2 //.197379 
sum perc_weight_social2 //.2021368


replace perc_weight_edu2=0.378629
replace perc_weight_inc2=0.22125
replace perc_weight_assets2=0.197379 
replace perc_weight_social2=0.2021368



generate education_weight_perc= education_goal_stand*perc_weight_edu2 
generate income_weight_perc = income_goal_1000_stand*perc_weight_inc2 
generate asset_weight_perc= asset_goal_1000_stand*perc_weight_assets2 
generate status_weight_perc = status_goal_stand*perc_weight_social2 


			

*generate the social aspirations index
gen asp_index = education_weight_perc + income_weight_perc + status_weight_perc + asset_weight_perc 
label var asp_index "Aspiration index"		


label var education_goal_stand "Education aspirations"
label var income_goal_1000_stand "Income aspirations (in 1000 Ariary)"
label var asset_goal_1000_stand "Asset aspirations (in 1000 Ariary)"
label var status_goal_stand "Status aspirations"


hist asp_index	
tab asp_index, m	


generate asp_index99=asp_index
sum asp_index99, d
replace asp_index99 = . if asp_index99>r(p99) //3 to missing
tab asp_index99
label var asp_index99 "Asp. index"

generate asp_index95=asp_index
sum asp_index95, d
replace asp_index95 = . if asp_index95>r(p95) //18 to missing
tab asp_index95, m //339
label var asp_index95 "Asp. index"




*Current Index



global base_asp "income_achieved_1000 status_achieved asset_achieved_1000"
												
						
foreach x in $base_asp{			
			egen `x'_pl_mean =mean(`x') if treatment == 0
			}
			
foreach x in $base_asp{			
			tab `x'_pl_mean
					}
			


/*

income_achi |
eved_1000_p |
     l_mean |      Freq.     Percent        Cum.
------------+-----------------------------------
   294.4772 |        124      100.00      100.00
------------+-----------------------------------
      Total |        124      100.00

status_achi |
eved_pl_mea |
          n |      Freq.     Percent        Cum.
------------+-----------------------------------
   4.427419 |        124      100.00      100.00
------------+-----------------------------------
      Total |        124      100.00

asset_achie |
ved_1000_pl |
      _mean |      Freq.     Percent        Cum.
------------+-----------------------------------
   69421.29 |        124      100.00      100.00
------------+-----------------------------------
      Total |        124      100.00



*/





replace income_achieved_1000_pl_mean =   294.4772   if income_achieved_1000_pl_mean ==. 

replace status_achieved_pl_mean =  4.427419    if status_achieved_pl_mean == . 


replace asset_achieved_1000_pl_mean = 69421.29  if asset_achieved_1000_pl_mean == .   



foreach x in $base_asp{			
			egen `x'_pl_sd =sd(`x') if treatment == 0
			}
			
			
			
foreach x in $base_asp{			
			tab `x'_pl_sd
					}
						
			
			

/*


income_achi |
eved_1000_p |
       l_sd |      Freq.     Percent        Cum.
------------+-----------------------------------
    2702.24 |        124      100.00      100.00
------------+-----------------------------------
      Total |        124      100.00

status_achi |
 eved_pl_sd |      Freq.     Percent        Cum.
------------+-----------------------------------
    2.02494 |        124      100.00      100.00
------------+-----------------------------------
      Total |        124      100.00

asset_achie |
ved_1000_pl |
        _sd |      Freq.     Percent        Cum.
------------+-----------------------------------
     198286 |        124      100.00      100.00
------------+-----------------------------------
      Total |        124      100.00



*/



replace income_achieved_1000_pl_sd = 2702.24  if income_achieved_1000_pl_sd ==. 

replace status_achieved_pl_sd = 2.02494  if status_achieved_pl_sd == . 


replace asset_achieved_1000_pl_sd = 198286 if asset_achieved_1000_pl_sd == .   



						
			
*generate the standardized measure for each dimension
foreach x in $base_asp{			
			gen `x'_st_curr=(`x'-`x'_pl_mean)/`x'_pl_sd
			}
			
foreach x in $base_asp{			
			tab `x'_st_curr
			sum `x'_st_curr
			}		


			
*generate the current index
gen current_index = income_achieved_1000_st_curr + asset_achieved_1000_st_curr + status_achieved_st_curr 



tab current_index, m
hist current_index

sum current_index if treatment ==0
replace current_index = -.0087124 if current_index ==. & treatment == 0
//1 change
sum current_index if treatment ==1
replace current_index = .8015294  if current_index ==. & treatment == 1
//2 changes
sum current_index if treatment ==2
replace current_index =  .4191623 if current_index ==. & treatment == 2
//0 changes

tab income_achieved_1000_st_curr, m
tab asset_achieved_1000_st_curr, m
tab status_achieved_st_curr, m


sum income_achieved_1000_st_curr if treatment ==0  //2.14e-09
replace income_achieved_1000_st_curr = 0.00000000214 if income_achieved_1000_st_curr ==. & treatment == 0
//1 change
sum income_achieved_1000_st_curr if treatment ==1 //-.0186534
replace income_achieved_1000_st_curr = -.0186534  if income_achieved_1000_st_curr ==. & treatment == 1
//1 change
sum income_achieved_1000_st_curr if treatment ==2 //.5304184
replace income_achieved_1000_st_curr =  .5304184 if income_achieved_1000_st_curr ==. & treatment == 2
//0 changes


sum asset_achieved_1000_st_curr if treatment ==0  //2.76e-09
replace asset_achieved_1000_st_curr = 0.00000000276 if asset_achieved_1000_st_curr ==. & treatment == 0
//0 changes
sum asset_achieved_1000_st_curr if treatment ==1 //-.3246046
replace asset_achieved_1000_st_curr = -.3246046 if asset_achieved_1000_st_curr ==. & treatment == 1
//1 change
sum asset_achieved_1000_st_curr if treatment ==2 //-.0225838 
replace asset_achieved_1000_st_curr =  -.0225838  if asset_achieved_1000_st_curr ==. & treatment == 2
//0 changes





*******SAVE DATASET:

save "${Directory}Data/processed/school_final", replace



